{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#神经网络的计算方式\n",
    "#第一层\n",
    "#神经元1:z1 = w1*x1 + w2*x2 + ... + w12*x12 + b\n",
    "#神经元2:z2 = 同上\n",
    "#神经元3:z3 = 同上\n",
    "\n",
    "#神经元1:a1 = a(z1)\n",
    "#神经元2:a2 = 同上\n",
    "#神经元3:a3 = 同上\n",
    "\n",
    "#第二层\n",
    "#神经元1:z1 = w1*a1 + w2*a2 + w3*a3 + b\n",
    "#神经元2:z2 = 同上\n",
    "\n",
    "#神经元1:a1 = a(z1)\n",
    "#神经元2:a2 = 同上\n",
    "\n",
    "#误差函数\n",
    "#C = 1/2 * [(y1 - a1)^2 + (y2 - a2)^2] <- 对所有x求和\n",
    "\n",
    "#直接对所有变量求导是很复杂的,比如要对第一层神经元1的w1求导\n",
    "#dC/dw1(第一层神经元1) = dC/da * da/dz * dz/da * (进入第一层)da/dz * dz/dw + (第二层神经元2同样的式子再来一遍)\n",
    "#还要对上面这个式子,以所有的x求和\n",
    "\n",
    "#定义符号delta = dC/dz\n",
    "#因为 z = w1*x1 + ... + w12*x12 + b\n",
    "#所以 dz/dw = x\n",
    "#所以 dz/db = 1\n",
    "\n",
    "#因为 dC/dw = dC/dz * dz/dw\n",
    "#所以 dC/dw = delta * x\n",
    "\n",
    "#因为 dC/db = dC/dz * dz/db\n",
    "#所以 dC/db = delta\n",
    "\n",
    "#可以直接求出第二层的delta\n",
    "#delta = dC/dz\n",
    "#delta = dC/da * da/dz\n",
    "#因为 C = 1/2 * [(y1 - a1)^2 + (y2 - a2)^2] <- 对所有x求和\n",
    "#所以 dC/da = a - y (dC/da1 = a1 - y1 , dC/da2 = a2 - y2)\n",
    "#所以 delta = (a - y) * da/dz\n",
    "#sigmoid函数求导\n",
    "#da/dz = a(z) * (1 - a(z))\n",
    "#所以 delta = (a - y) * a(z) * (1 - a(z))\n",
    "\n",
    "#求第一层的delta\n",
    "#delta = dC/dz(第二层) * dz/da * da/dz + dC/dz(第二层第二个神经元) * dz/da * da/dz\n",
    "#因为 dC/dz(第二层) = delta(第二层)\n",
    "#因为 dz/da = w (这里的z是第二层的,a是第一层的,这个w也是第二层的)\n",
    "#因为 da/dz = a(z) * (1 - a(z))\n",
    "#所以 delta = delta(第二层) * w(第二层) * [a(z) * (1 - a(z))] + (第二层第二个神经元同样的式子)\n",
    "#所以 delta = [delta(第二层) * w(第二层) + delta(第二层第二个) * w(第二层第二个)] * [a(z) * (1 - a(z))]\n",
    "\n",
    "#到这里,可以总结出一般的delta的计算公式\n",
    "#delta = [delta(上一层) * w(上一层) <- 对上一层所有神经元求和] * [a(z) * (1 - a(z))]\n",
    "\n",
    "#求梯度\n",
    "#dC/dw = delta * x <- 对所有x求和\n",
    "#dC/db = delta <- 对所有x求和"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([[1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1],\n",
       "        [0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1],\n",
       "        [1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1],\n",
       "        [1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0],\n",
       "        [1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1]]),\n",
       " array([[1, 0],\n",
       "        [1, 0],\n",
       "        [1, 0],\n",
       "        [1, 0],\n",
       "        [1, 0]]),\n",
       " (64, 12),\n",
       " (64, 2))"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "\n",
    "#加载数据\n",
    "def load_data():\n",
    "    with open('数字01.txt') as fr:\n",
    "        lines = fr.readlines()\n",
    "\n",
    "    x = np.empty((len(lines), 12), dtype=int)\n",
    "    y = np.empty((len(lines), 2), dtype=int)\n",
    "\n",
    "    for i in range(len(lines)):\n",
    "        line = lines[i].strip().split(',')\n",
    "        x[i] = line[:12]\n",
    "        y[i] = [1, 0] if line[12] == '0' else [0, 1]\n",
    "\n",
    "    return x, y\n",
    "\n",
    "\n",
    "x, y = load_data()\n",
    "x[:5], y[:5], x.shape, y.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "#定义常量\n",
    "N, M = x.shape\n",
    "\n",
    "lr = 0.2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "#定义神经元对象\n",
    "class Neural:\n",
    "    def __init__(self, w, b):\n",
    "        self.w = w\n",
    "        self.b = b\n",
    "\n",
    "    def run(self, xi):\n",
    "        #线性计算\n",
    "        self.z = np.multiply(xi, self.w).sum() + self.b\n",
    "\n",
    "        #激活函数,sigmoid\n",
    "        self.a = 1 / (1 + np.exp(-self.z))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#定义网络层对象\n",
    "class Layer:\n",
    "    def __init__(self, ns):\n",
    "        #神经元列表\n",
    "        self.ns = ns\n",
    "        self.out = None\n",
    "\n",
    "    #运行神经元,并记录运行结果\n",
    "    def run(self, xi):\n",
    "        for n in self.ns:\n",
    "            n.run(xi)\n",
    "\n",
    "        out = []\n",
    "        for i in range(len(self.ns)):\n",
    "            out.append(self.ns[i].a)\n",
    "        self.out = np.array(out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.00420612, 0.88058693, 0.17450929])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#定义第一层神经网络\n",
    "ns = []\n",
    "\n",
    "w = np.array([\n",
    "    0.490, 0.348, 0.073, 0.837, -0.071, -3.617, -0.536, -0.023, -1.717, -1.456,\n",
    "    -0.556, 0.852\n",
    "])\n",
    "ns.append(Neural(w, b=-0.185))\n",
    "\n",
    "w = np.array([\n",
    "    0.442, -0.537, 1.008, 1.072, -0.733, 0.823, -0.453, -0.014, -0.027, -0.427,\n",
    "    1.876, -2.305\n",
    "])\n",
    "\n",
    "ns.append(Neural(w, b=0.526))\n",
    "\n",
    "w = np.array([\n",
    "    0.654, -1.389, 1.246, 0.057, -0.183, -0.743, -0.461, 0.331, 0.449, -1.296,\n",
    "    1.569, -0.471\n",
    "])\n",
    "ns.append(Neural(w, b=-1.169))\n",
    "\n",
    "layer_1 = Layer(ns)\n",
    "\n",
    "#运行第一层神经网络\n",
    "layer_1.run(x[0])\n",
    "\n",
    "layer_1.out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.32646968, 0.14142001])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#定义第二层神经网络\n",
    "ns = []\n",
    "\n",
    "w = np.array([0.388, 0.803, 0.029])\n",
    "ns.append(Neural(w, b=-1.438))\n",
    "\n",
    "w = np.array([0.025, -0.790, 1.553])\n",
    "ns.append(Neural(w, b=-1.379))\n",
    "\n",
    "layer_2 = Layer(ns)\n",
    "\n",
    "#运行第二层神经网络\n",
    "layer_2.run(layer_1.out)\n",
    "\n",
    "layer_2.out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([[0.00420612, 0.88058693, 0.17450929],\n",
       "        [0.00258098, 0.82577863, 0.09903438],\n",
       "        [0.00391117, 0.72908792, 0.05732418],\n",
       "        [0.0017985 , 0.98665265, 0.25293871],\n",
       "        [0.01779295, 0.91871392, 0.43585542]]),\n",
       " array([[0.32646968, 0.14142001],\n",
       "        [0.31625713, 0.13267627],\n",
       "        [0.29956661, 0.1340218 ],\n",
       "        [0.34577006, 0.14609076],\n",
       "        [0.33609856, 0.19349542]]))"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#先计算所有x的输出矩阵\n",
    "def get_out():\n",
    "    out_1 = np.zeros((N, 3))\n",
    "    out_2 = np.zeros((N, 2))\n",
    "    for i in range(N):\n",
    "        layer_1.run(x[i])\n",
    "        layer_2.run(layer_1.out)\n",
    "\n",
    "        out_1[i] = layer_1.out\n",
    "        out_2[i] = layer_2.out\n",
    "    return out_1, out_2\n",
    "\n",
    "\n",
    "out_1, out_2 = get_out()\n",
    "out_1[:5], out_2[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-6"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#sigmoid函数求导 根据公式 da/dz = a(z) * (1 - a(z))\n",
    "def da(a):\n",
    "    return a * (1 - a)\n",
    "\n",
    "\n",
    "da(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.14810072,  0.01717127],\n",
       "       [-0.14785157,  0.01526749],\n",
       "       [-0.14696946,  0.01555457],\n",
       "       [-0.1479954 ,  0.01822457],\n",
       "       [-0.14814052,  0.03019592]])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#可以直接求出第二层的delta\n",
    "#delta = dC/dz\n",
    "#delta = dC/da * da/dz\n",
    "#因为 C = 1/2 * [(y1 - a1)^2 + (y2 - a2)^2] <- 对所有x求和\n",
    "#所以 dC/da = a - y (dC/da1 = a1 - y1 , dC/da2 = a2 - y2)\n",
    "#所以 delta = (a - y) * da/dz\n",
    "#sigmoid函数求导\n",
    "#da/dz = a(z) * (1 - a(z))\n",
    "#所以 delta = (a - y) * a(z) * (1 - a(z))\n",
    "def get_delta_2():\n",
    "    delta_2 = np.zeros((N, 2))\n",
    "    for i in range(N):\n",
    "        delta_2[i, 0] = (out_2[i, 0] - y[i, 0]) * da(out_2[i, 0])\n",
    "        delta_2[i, 1] = (out_2[i, 1] - y[i, 1]) * da(out_2[i, 1])\n",
    "    return delta_2\n",
    "\n",
    "\n",
    "delta_2 = get_delta_2()\n",
    "delta_2[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.00023888, -0.01393182,  0.00322283],\n",
       "       [-0.0001467 , -0.01881598,  0.00173302],\n",
       "       [-0.00022064, -0.02573759,  0.00107504],\n",
       "       [-0.00010227, -0.00175463,  0.00453711],\n",
       "       [-0.00099132, -0.01066498,  0.01047427]])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#求第一层的delta\n",
    "#delta = dC/dz(第二层) * dz/da * da/dz + dC/dz(第二层第二个神经元) * dz/da * da/dz\n",
    "#因为 dC/dz(第二层) = delta(第二层)\n",
    "#因为 dz/da = w (这里的z是第二层的,a是第一层的,这个w也是第二层的)\n",
    "#因为 da/dz = a(z) * (1 - a(z))\n",
    "#所以 delta = delta(第二层) * w(第二层) * [a(z) * (1 - a(z))] + (第二层第二个神经元同样的式子)\n",
    "#所以 delta = [delta(第二层) * w(第二层) + delta(第二层第二个) * w(第二层第二个)] * [a(z) * (1 - a(z))]\n",
    "def get_delta_1():\n",
    "    delta_1 = np.zeros((N, 3))\n",
    "    for i in range(N):\n",
    "        delta_1[i, 0] = (delta_2[i, 0] * layer_2.ns[0].w[0] +\n",
    "                         delta_2[i, 1] * layer_2.ns[1].w[0]) * da(out_1[i, 0])\n",
    "\n",
    "        delta_1[i, 1] = (delta_2[i, 0] * layer_2.ns[0].w[1] +\n",
    "                         delta_2[i, 1] * layer_2.ns[1].w[1]) * da(out_1[i, 1])\n",
    "\n",
    "        delta_1[i, 2] = (delta_2[i, 0] * layer_2.ns[0].w[2] +\n",
    "                         delta_2[i, 1] * layer_2.ns[1].w[2]) * da(out_1[i, 2])\n",
    "\n",
    "    return delta_1\n",
    "\n",
    "\n",
    "delta_1 = get_delta_1()\n",
    "delta_1[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([[ 4.05233159e-02,  6.83227585e-02, -2.23664065e-02,\n",
       "         -1.46557740e-02,  1.03294660e-01, -1.32018900e-02,\n",
       "         -1.38212794e-02,  9.30558590e-02, -2.18880163e-02,\n",
       "         -3.32488398e-05,  8.05971291e-02, -1.08854079e-02],\n",
       "        [-1.88945094e-02,  1.92699466e-01, -2.94810051e-01,\n",
       "         -4.80701480e-01,  5.88722435e-01, -3.94014844e-01,\n",
       "         -5.33813037e-01,  6.44996571e-01, -4.12351372e-01,\n",
       "         -2.86739148e-01,  1.86757266e-01, -3.95883564e-01],\n",
       "        [-4.91294388e-01, -7.93739379e-01,  3.75313285e-02,\n",
       "          1.64883495e-02, -9.59411464e-01, -8.56413832e-02,\n",
       "          1.59010319e-02, -9.22133439e-01, -1.28516297e-01,\n",
       "         -1.16847282e-01, -8.89089583e-01, -1.62704720e-01]]),\n",
       " array([ 0.08252767,  0.1210072 , -0.93234667]))"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#求梯度\n",
    "#dC/dw = delta * x <- 对所有x求和\n",
    "#dC/db = delta <- 对所有x求和\n",
    "def get_gradient_1():\n",
    "    gradient_w = np.zeros((3, 12))\n",
    "    gradient_b = np.zeros(3)\n",
    "    for i in range(N):\n",
    "        gradient_w[0] += x[i] * delta_1[i, 0]\n",
    "        gradient_w[1] += x[i] * delta_1[i, 1]\n",
    "        gradient_w[2] += x[i] * delta_1[i, 2]\n",
    "\n",
    "        gradient_b += delta_1[i]\n",
    "\n",
    "    return gradient_w, gradient_b\n",
    "\n",
    "\n",
    "gradient_w_1, gradient_b_1 = get_gradient_1()\n",
    "gradient_w_1, gradient_b_1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([[ 0.54097704, -1.9398653 , -0.13472918],\n",
       "        [-1.15723532, -2.10601752, -1.02770687]]),\n",
       " array([-2.49194683, -3.2636605 ]))"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#求梯度\n",
    "#dC/dw = delta * x <- 对所有x求和\n",
    "#dC/db = delta <- 对所有x求和\n",
    "def get_gradient_2():\n",
    "    gradient_w = np.zeros((2, 3))\n",
    "    gradient_b = np.zeros(2)\n",
    "    for i in range(N):\n",
    "        gradient_w[0] += out_1[i] * delta_2[i, 0]\n",
    "        gradient_w[1] += out_1[i] * delta_2[i, 1]\n",
    "\n",
    "        gradient_b += delta_2[i]\n",
    "\n",
    "    return gradient_w, gradient_b\n",
    "\n",
    "\n",
    "gradient_w_2, gradient_b_2 = get_gradient_2()\n",
    "gradient_w_2, gradient_b_2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([ 0.48189534,  0.33433545,  0.07747328,  0.83993115, -0.09165893,\n",
       "        -3.61435962, -0.53323574, -0.04161117, -1.7126224 , -1.45599335,\n",
       "        -0.57211943,  0.85417708]),\n",
       " -0.2015055348055877,\n",
       " array([0.27980459, 1.19097306, 0.05594584]),\n",
       " -0.9396106341633224)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#更新参数\n",
    "def update():\n",
    "    for i in range(3):\n",
    "        layer_1.ns[i].w -= gradient_w_1[i] * lr\n",
    "        layer_1.ns[i].b -= gradient_b_1[i] * lr\n",
    "\n",
    "    for i in range(2):\n",
    "        layer_2.ns[i].w -= gradient_w_2[i] * lr\n",
    "        layer_2.ns[i].b -= gradient_b_2[i] * lr\n",
    "\n",
    "\n",
    "update()\n",
    "\n",
    "layer_1.ns[0].w, layer_1.ns[0].b, layer_2.ns[0].w, layer_2.ns[0].b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([ 0.43966452,  0.79168649, -0.11471806,  0.85896993,  0.30173189,\n",
       "        -3.69998133, -0.48450415,  0.31631447, -1.93983601, -1.43356086,\n",
       "        -0.11891537,  0.6508831 ]),\n",
       " 0.25076062113669856,\n",
       " array([-1.31244417,  3.5897345 , -3.05645996]),\n",
       " -0.32577782301713837)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#批量训练\n",
    "for _ in range(49):\n",
    "    out_1, out_2 = get_out()\n",
    "    \n",
    "    delta_2 = get_delta_2()\n",
    "    delta_1 = get_delta_1()\n",
    "    \n",
    "    gradient_w_1, gradient_b_1 = get_gradient_1()\n",
    "    gradient_w_2, gradient_b_2 = get_gradient_2()\n",
    "    \n",
    "    update()\n",
    "    \n",
    "layer_1.ns[0].w, layer_1.ns[0].b, layer_2.ns[0].w, layer_2.ns[0].b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#测试\n",
    "correct = 0\n",
    "for i in range(N):\n",
    "    pred = out_2[i].argmax()\n",
    "\n",
    "    if pred == y[i].argmax():\n",
    "        correct += 1\n",
    "\n",
    "correct / N"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
